In [ ]:
import rasterio
from rasterio.mask import mask
import geopandas as gpd
import numpy as np
from shapely.geometry import mapping
import geopandas as gpd
In [ ]:
import os # we need os to do some basic file operations

sentinal_fp = "../RandomForest/data_a/sentinel/"
# find every file in the sentinal_fp directory
sentinal_band_paths = [os.path.join(sentinal_fp, f) for f in os.listdir(sentinal_fp) if os.path.isfile(os.path.join(sentinal_fp, f))]
sentinal_band_paths.sort()
sentinal_band_paths
Out[ ]:
['../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B02_10m_extent.tif',
 '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B03_10m_extent.tif',
 '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B04_10m_extent.tif',
 '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B05_20m_extent.tif',
 '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B06_20m_extent.tif',
 '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B07_20m_extent.tif',
 '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B08_10m_extent.tif',
 '../RandomForest/data_a/sentinel/T32UPU_20210814T102031_B8A_20m_extent.tif']
In [ ]:
file_path = 'C:/Users/leoni/Documents/Uni/UGS/RandomForest/data_a/landcover/Data/DE033L1_AUGSBURG_UA2018_v013.gpkg'

# Load the dataset
data = gpd.read_file(file_path)
data
Out[ ]:
country fua_name fua_code code_2018 class_2018 prod_date identifier perimeter area comment Pop2018 geometry
0 DE Augsburg DE033L1 11100 Continuous urban fabric (S.L. : > 80%) 2020-09 425-DE033L1 155.888916 1185.135544 None 9 MULTIPOLYGON (((4387596.158 2801154.636, 43875...
1 DE Augsburg DE033L1 11100 Continuous urban fabric (S.L. : > 80%) 2020-09 785-DE033L1 204.742805 2617.844445 None 53 MULTIPOLYGON (((4385599.462 2805744.357, 43856...
2 DE Augsburg DE033L1 11100 Continuous urban fabric (S.L. : > 80%) 2020-09 751-DE033L1 397.305451 9451.982988 None 193 MULTIPOLYGON (((4389167.596 2805621.979, 43890...
3 DE Augsburg DE033L1 11210 Discontinuous dense urban fabric (S.L. : 50% -... 2020-09 5156-DE033L1 2683.734912 35662.775997 None 166 MULTIPOLYGON (((4394320.820 2808000.000, 43943...
4 DE Augsburg DE033L1 11100 Continuous urban fabric (S.L. : > 80%) 2020-09 1986-DE033L1 297.148595 4588.045376 None 60 MULTIPOLYGON (((4389395.144 2807242.564, 43893...
... ... ... ... ... ... ... ... ... ... ... ... ...
25378 DE Augsburg DE033L1 31000 Forests 2020-09 24837-DE033L1 682.883091 27090.306391 None 0 MULTIPOLYGON (((4411181.690 2815311.223, 44111...
25379 DE Augsburg DE033L1 32000 Herbaceous vegetation associations (natural gr... 2020-09 25099-DE033L1 485.354917 11090.189969 None 0 MULTIPOLYGON (((4397210.659 2824962.333, 43972...
25380 DE Augsburg DE033L1 32000 Herbaceous vegetation associations (natural gr... 2020-09 25134-DE033L1 528.446541 11505.819504 None 0 MULTIPOLYGON (((4397105.659 2832772.333, 43971...
25381 DE Augsburg DE033L1 50000 Water 2020-09 25235-DE033L1 568.167123 10591.167983 None 0 MULTIPOLYGON (((4388260.684 2807000.000, 43882...
25382 DE Augsburg DE033L1 23000 Pastures 2020-09 23246-DE033L1 763.153970 18744.576748 None 0 MULTIPOLYGON (((4391115.659 2828972.333, 43911...

25383 rows × 12 columns

In [ ]:
data.plot()
Out[ ]:
<Axes: >
No description has been provided for this image
In [ ]:
# create a products directory within the data dir which won't be uploaded to Github
img_dir = '../RandomForest/data_a/products2/'

# check to see if the dir it exists, if not, create it
if not os.path.exists(img_dir):
    os.makedirs(img_dir)

# filepath for image we're writing out
img_fp = img_dir + 'sentinel_bands.tif'

# Read metadata of first file and assume all other bands are the same
with rasterio.open(sentinal_band_paths[0]) as src0:
    meta = src0.meta

# Update metadata to reflect the number of layers
meta.update(count = len(sentinal_band_paths))

# Read each layer and write it to stack
with rasterio.open(img_fp, 'w', **meta) as dst:
    for id, layer in enumerate(sentinal_band_paths, start=1):
        with rasterio.open(layer) as src1:
            dst.write_band(id, src1.read(1))
---------------------------------------------------------------------------
CPLE_AppDefinedError                      Traceback (most recent call last)
Cell In[66], line 19
     16 meta.update(count = len(sentinal_band_paths))
     18 # Read each layer and write it to stack
---> 19 with rasterio.open(img_fp, 'w', **meta) as dst:
     20     for id, layer in enumerate(sentinal_band_paths, start=1):
     21         with rasterio.open(layer) as src1:

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\env.py:451, in ensure_env_with_credentials.<locals>.wrapper(*args, **kwds)
    448     session = DummySession()
    450 with env_ctor(session=session):
--> 451     return f(*args, **kwds)

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\__init__.py:314, in open(fp, mode, driver, width, height, count, crs, transform, dtype, nodata, sharing, **kwargs)
    312 writer = get_writer_for_driver(driver)
    313 if writer is not None:
--> 314     dataset = writer(
    315         path,
    316         mode,
    317         driver=driver,
    318         width=width,
    319         height=height,
    320         count=count,
    321         crs=crs,
    322         transform=transform,
    323         dtype=dtype,
    324         nodata=nodata,
    325         sharing=sharing,
    326         **kwargs
    327     )
    328 else:
    329     raise DriverCapabilityError(
    330         "Writer does not exist for driver: %s" % str(driver)
    331     )

File rasterio\_io.pyx:1430, in rasterio._io.DatasetWriterBase.__init__()

File rasterio\_io.pyx:293, in rasterio._io._delete_dataset_if_exists()

File rasterio\_err.pyx:195, in rasterio._err.exc_wrap_int()

CPLE_AppDefinedError: Deleting ../RandomForest/data_a/products2/sentinel_bands.tif failed: Permission denied
In [ ]:
full_dataset = rasterio.open(img_fp)
img_rows, img_cols = full_dataset.shape
img_bands = full_dataset.count
print(full_dataset.shape) # dimensions
print(full_dataset.count) # bands
(2197, 1478)
8
In [ ]:
full_dataset
Out[ ]:
<open DatasetReader name='../RandomForest/data_a/products2/sentinel_bands.tif' mode='r'>
In [ ]:
import numpy as np
from rasterio.plot import show
import matplotlib.pyplot as plt

img_fp = '../RandomForest/data_a/products2/sentinel_bands_proj.tif'

# Open the full extent of the raster dataset
with rasterio.open(img_fp) as dataset:
    # Read the specified bands (3, 2, 1 for RGB)
    img_data = dataset.read([3, 2, 1])
# Assuming you've already read the img_data
# Scale the data to 0-255 range, assuming it's 16-bit
img_data = np.clip(img_data / 4096 * 255, 0, 255).astype(np.uint8)

# Mask out no data values if necessary
no_data_value = -9999  # Replace with your actual no data value
img_data[img_data == no_data_value] = 0  # Set no data values to 0 for display

# Now display the image
fig, ax = plt.subplots(figsize=(10, 7))
show(img_data, ax=ax, transform=dataset.transform)

plt.show()
No description has been provided for this image
In [ ]:
full_dataset = rasterio.open(img_fp)
raster_crs = full_dataset.crs
In [ ]:
from rasterio.warp import calculate_default_transform, reproject, Resampling

# Define the source and destination coordinate reference systems
src_crs = 'EPSG:32632'
dst_crs = 'EPSG:4326'

# Open the original dataset
with rasterio.open('../RandomForest/data_a/products/sentinel_bands.tif') as src:
    # Calculate the transform and dimensions for the new dataset
    transform, width, height = calculate_default_transform(
        src.crs, dst_crs, src.width, src.height, *src.bounds)
    
    # Define the metadata for the new dataset
    kwargs = src.meta.copy()
    kwargs.update({
        'crs': dst_crs,
        'transform': transform,
        'width': width,
        'height': height
    })

    # Create the new dataset and reproject the original data into it
    with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif', 'w', **kwargs) as dst:
        for i in range(1, src.count + 1):  # Loop through all bands
            reproject(
                source=rasterio.band(src, i),
                destination=rasterio.band(dst, i),
                src_transform=src.transform,
                src_crs=src.crs,
                dst_transform=transform,
                dst_crs=dst_crs,
                resampling=Resampling.nearest)
In [ ]:
shapefile = gpd.read_file('C:/Users/leoni/Documents/Uni/UGS/RandomForest/data_a/landcover/Data/DE033L1_AUGSBURG_UA2018_v013.gpkg')
shapefile.crs
Out[ ]:
<Projected CRS: EPSG:3035>
Name: ETRS89-extended / LAEA Europe
Axis Info [cartesian]:
- Y[north]: Northing (metre)
- X[east]: Easting (metre)
Area of Use:
- name: Europe - European Union (EU) countries and candidates. Europe - onshore and offshore: Albania; Andorra; Austria; Belgium; Bosnia and Herzegovina; Bulgaria; Croatia; Cyprus; Czechia; Denmark; Estonia; Faroe Islands; Finland; France; Germany; Gibraltar; Greece; Hungary; Iceland; Ireland; Italy; Kosovo; Latvia; Liechtenstein; Lithuania; Luxembourg; Malta; Monaco; Montenegro; Netherlands; North Macedonia; Norway including Svalbard and Jan Mayen; Poland; Portugal including Madeira and Azores; Romania; San Marino; Serbia; Slovakia; Slovenia; Spain including Canary Islands; Sweden; Switzerland; Türkiye (Turkey); United Kingdom (UK) including Channel Islands and Isle of Man; Vatican City State.
- bounds: (-35.58, 24.6, 44.83, 84.73)
Coordinate Operation:
- name: Europe Equal Area 2001
- method: Lambert Azimuthal Equal Area
Datum: European Terrestrial Reference System 1989 ensemble
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich
In [ ]:
shapefile = shapefile.to_crs(epsg=32632)
shapefile.crs
Out[ ]:
<Projected CRS: EPSG:32632>
Name: WGS 84 / UTM zone 32N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 6°E and 12°E, northern hemisphere between equator and 84°N, onshore and offshore. Algeria. Austria. Cameroon. Denmark. Equatorial Guinea. France. Gabon. Germany. Italy. Libya. Liechtenstein. Monaco. Netherlands. Niger. Nigeria. Norway. Sao Tome and Principe. Svalbard. Sweden. Switzerland. Tunisia. Vatican City State.
- bounds: (6.0, 0.0, 12.0, 84.0)
Coordinate Operation:
- name: UTM zone 32N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
In [ ]:
shapefile
Out[ ]:
country fua_name fua_code code_2018 class_2018 prod_date identifier perimeter area comment Pop2018 geometry
0 DE Augsburg DE033L1 11100 Continuous urban fabric (S.L. : > 80%) 2020-09 425-DE033L1 155.888916 1185.135544 None 9 MULTIPOLYGON (((640671.445 5353635.798, 640668...
1 DE Augsburg DE033L1 11100 Continuous urban fabric (S.L. : > 80%) 2020-09 785-DE033L1 204.742805 2617.844445 None 53 MULTIPOLYGON (((638616.901 5358201.245, 638618...
2 DE Augsburg DE033L1 11100 Continuous urban fabric (S.L. : > 80%) 2020-09 751-DE033L1 397.305451 9451.982988 None 193 MULTIPOLYGON (((642184.268 5358123.727, 642109...
3 DE Augsburg DE033L1 11210 Discontinuous dense urban fabric (S.L. : 50% -... 2020-09 5156-DE033L1 2683.734912 35662.775997 None 166 MULTIPOLYGON (((647303.556 5360566.887, 647300...
4 DE Augsburg DE033L1 11100 Continuous urban fabric (S.L. : > 80%) 2020-09 1986-DE033L1 297.148595 4588.045376 None 60 MULTIPOLYGON (((642390.773 5359747.470, 642356...
... ... ... ... ... ... ... ... ... ... ... ... ...
25378 DE Augsburg DE033L1 31000 Forests 2020-09 24837-DE033L1 682.883091 27090.306391 None 0 MULTIPOLYGON (((664060.510 5368090.340, 664059...
25379 DE Augsburg DE033L1 32000 Herbaceous vegetation associations (natural gr... 2020-09 25099-DE033L1 485.354917 11090.189969 None 0 MULTIPOLYGON (((649972.813 5377568.573, 649975...
25380 DE Augsburg DE033L1 32000 Herbaceous vegetation associations (natural gr... 2020-09 25134-DE033L1 528.446541 11505.819504 None 0 MULTIPOLYGON (((649766.914 5385378.476, 649771...
25381 DE Augsburg DE033L1 50000 Water 2020-09 25235-DE033L1 568.167123 10591.167983 None 0 MULTIPOLYGON (((641260.177 5359490.598, 641256...
25382 DE Augsburg DE033L1 23000 Pastures 2020-09 23246-DE033L1 763.153970 18744.576748 None 0 MULTIPOLYGON (((643829.528 5381502.382, 643827...

25383 rows × 12 columns

In [ ]:
shapefile.groupby('code_2018')['class_2018'].unique()
Out[ ]:
code_2018
11100             [Continuous urban fabric (S.L. : > 80%)]
11210    [Discontinuous dense urban fabric (S.L. : 50% ...
11220    [Discontinuous medium density urban fabric (S....
11230    [Discontinuous low density urban fabric (S.L. ...
11240    [Discontinuous very low density urban fabric (...
11300                                [Isolated structures]
12100    [Industrial, commercial, public, military and ...
12210             [Fast transit roads and associated land]
12220                    [Other roads and associated land]
12230                       [Railways and associated land]
12400                                           [Airports]
13100                  [Mineral extraction and dump sites]
13300                                 [Construction sites]
13400                           [Land without current use]
14100                                  [Green urban areas]
14200                      [Sports and leisure facilities]
21000                         [Arable land (annual crops)]
23000                                           [Pastures]
31000                                            [Forests]
32000    [Herbaceous vegetation associations (natural g...
50000                                              [Water]
Name: class_2018, dtype: object
In [ ]:
shapefile = shapefile.to_crs({'init': 'epsg:4326'})
shapefile.crs
c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\pyproj\crs\crs.py:141: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6
  in_crs_string = _prepare_from_proj_string(in_crs_string)
Out[ ]:
<Geographic 2D CRS: +init=epsg:4326 +type=crs>
Name: WGS 84
Axis Info [ellipsoidal]:
- lon[east]: Longitude (degree)
- lat[north]: Latitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
In [ ]:
len(shapefile)
Out[ ]:
25383
In [ ]:
# this generates a list of shapely geometries
geoms = shapefile.geometry.values 

# let's grab a single shapely geometry to check
geometry = geoms[0] 
print(type(geometry))
print(geometry)

# transform to GeoJSON format
from shapely.geometry import mapping
feature = [mapping(geometry)] # can also do this using polygon.__geo_interface__
print(type(feature))
print(feature)
<class 'shapely.geometry.multipolygon.MultiPolygon'>
MULTIPOLYGON (((10.897603435546376 48.32024993125257, 10.897558639391013 48.32006889000404, 10.896796603606004 48.32015034274349, 10.896843224431512 48.32033682294202, 10.897603435546376 48.32024993125257)))
<class 'list'>
[{'type': 'MultiPolygon', 'coordinates': [(((10.897603435546376, 48.32024993125257), (10.897558639391013, 48.32006889000404), (10.896796603606004, 48.32015034274349), (10.896843224431512, 48.32033682294202), (10.897603435546376, 48.32024993125257)),)]}]
In [ ]:
out_image, out_transform = mask(full_dataset, feature, crop=True)
out_image.shape
Out[ ]:
(8, 3, 8)
In [ ]:
full_dataset.close()
In [ ]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
from shapely.geometry import box


# Load the shapefile
#shapefile = gpd.read_file(shapefile_path)

# Check if the CRS matches, if not, reproject
if shapefile.crs != raster_crs:
    shapefile = shapefile.to_crs(raster_crs)

# Check that geometries are valid
shapefile['valid'] = shapefile.is_valid
shapefile = shapefile[shapefile['valid']]


with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
    raster_bounds = src.bounds
    raster_bbox = box(*raster_bounds)  # Create a bounding box from the raster bounds

    # Only proceed if the raster and shapefile overlap
    if not shapefile.unary_union.intersects(raster_bbox):
        raise ValueError("Shapefile and raster do not overlap")
    else:
        print("The shapefile and raster overlap.")

    # Extract the raster values within the polygon
    for geom in shapefile.geometry:
        feature = [mapping(geom)]
        out_image, out_transform = mask(src, feature, crop=True)
        # process out_image
The shapefile and raster overlap.
---------------------------------------------------------------------------
WindowError                               Traceback (most recent call last)
File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\mask.py:80, in raster_geometry_mask(dataset, shapes, all_touched, invert, crop, pad, pad_width)
     79 try:
---> 80     window = geometry_window(dataset, shapes, pad_x=pad_x, pad_y=pad_y)
     82 except WindowError:
     83     # If shapes do not overlap raster, raise Exception or UserWarning
     84     # depending on value of crop

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\features.py:477, in geometry_window(dataset, shapes, pad_x, pad_y, north_up, rotated, pixel_precision, boundless)
    476 if not boundless:
--> 477     window = window.intersection(raster_window)
    479 return window

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\windows.py:775, in Window.intersection(self, other)
    763 """Return the intersection of this window and another
    764 
    765 Parameters
   (...)
    773 Window
    774 """
--> 775 return intersection([self, other])

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\windows.py:125, in iter_args.<locals>.wrapper(*args, **kwargs)
    124 if len(args) == 1 and isinstance(args[0], Iterable):
--> 125     return function(*args[0])
    126 else:

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\windows.py:239, in intersection(*windows)
    226 """Innermost extent of window intersections.
    227 
    228 Will raise WindowError if windows do not intersect.
   (...)
    237 Window
    238 """
--> 239 return functools.reduce(_intersection, windows)

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\windows.py:257, in _intersection(w1, w2)
    256 else:
--> 257     raise WindowError(f"Intersection is empty {w1} {w2}")

WindowError: Intersection is empty Window(col_off=2116, row_off=722, width=20, height=41) Window(col_off=0, row_off=0, width=1968, height=1913)

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[81], line 33
     31 for geom in shapefile.geometry:
     32     feature = [mapping(geom)]
---> 33     out_image, out_transform = mask(src, feature, crop=True)
     34     # process out_image

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\mask.py:178, in mask(dataset, shapes, all_touched, invert, nodata, filled, crop, pad, pad_width, indexes)
    175     else:
    176         nodata = 0
--> 178 shape_mask, transform, window = raster_geometry_mask(
    179     dataset, shapes, all_touched=all_touched, invert=invert, crop=crop,
    180     pad=pad, pad_width=pad_width)
    182 if indexes is None:
    183     out_shape = (dataset.count, ) + shape_mask.shape

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\rasterio\mask.py:86, in raster_geometry_mask(dataset, shapes, all_touched, invert, crop, pad, pad_width)
     82 except WindowError:
     83     # If shapes do not overlap raster, raise Exception or UserWarning
     84     # depending on value of crop
     85     if crop:
---> 86         raise ValueError('Input shapes do not overlap raster.')
     87     else:
     88         warnings.warn('shapes are outside bounds of raster. '
     89                       'Are they in different coordinate reference systems?')

ValueError: Input shapes do not overlap raster.

Adjust Landcover Data to match the scene¶

In [ ]:
# Reproject the shapefile to match the raster's CRS
shapefile = shapefile.to_crs(epsg=32632)
In [ ]:
with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
    print(src.bounds)
print(shapefile.total_bounds)
BoundingBox(left=10.7543531803083, bottom=48.25329694782757, right=10.97190533912122, top=48.46476914285253)
[ 610820.72860213 5327718.5402929   670858.48189483 5388978.22555986]
In [ ]:
shapefile = shapefile.to_crs("EPSG:4326")
print(shapefile.total_bounds)
[10.494824  48.0892135 11.3108095 48.6385815]
In [ ]:
import matplotlib.pyplot as plt
import rasterio
import rasterio.plot

fig, ax = plt.subplots(figsize=(10, 10))
with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
    rasterio.plot.show(src, ax=ax)
shapefile.plot(ax=ax, facecolor='none', edgecolor='red')
plt.show()
No description has been provided for this image
In [ ]:
from shapely.geometry import box
from shapely.affinity import scale

shapefile_path = 'C:/Users/leoni/Documents/Uni/UGS/RandomForest/data_a/landcover/Data/DE033L1_AUGSBURG_UA2018_v013.gpkg'

# Open the raster file and get its bounds
with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
    bounds = src.bounds

# Create a slightly smaller bounding box geometry from the raster bounds
# Reduce each side of the bounding box by a small fraction, e.g., 0.99 to reduce by 1%
smaller_bbox = scale(box(bounds.left, bounds.bottom, bounds.right, bounds.top), xfact=0.99, yfact=0.99, origin='center')

# Create a GeoDataFrame from the smaller bounding box
smaller_bbox_gdf = gpd.GeoDataFrame({'geometry': [smaller_bbox]}, crs=src.crs)

# Load the shapefile
shapefile = gpd.read_file(shapefile_path)

# Make sure the shapefile is in the same CRS as the raster
shapefile = shapefile.to_crs(src.crs)

# Perform a spatial join to find features completely within the smaller bounding box
within_shapefile = gpd.sjoin(shapefile, smaller_bbox_gdf, op='within')

# Save the clipped shapefile to a new file
within_shapefile.to_file('within_shapefile_smaller_bbox.gpkg', driver='GPKG')
c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\IPython\core\interactiveshell.py:3466: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.
  if await self.run_code(code, result, async_=asy):
In [ ]:
within_shapefile = within_shapefile.groupby('class_2018', group_keys=False).apply(lambda x: x.sample(n=10, replace=True))
within_shapefile
Out[ ]:
country fua_name fua_code code_2018 class_2018 prod_date identifier perimeter area comment Pop2018 geometry index_right
15079 DE Augsburg DE033L1 12400 Airports 2020-09 15062-DE033L1 5540.286317 1.073691e+06 None 0 MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... 0
15079 DE Augsburg DE033L1 12400 Airports 2020-09 15062-DE033L1 5540.286317 1.073691e+06 None 0 MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... 0
15079 DE Augsburg DE033L1 12400 Airports 2020-09 15062-DE033L1 5540.286317 1.073691e+06 None 0 MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... 0
15079 DE Augsburg DE033L1 12400 Airports 2020-09 15062-DE033L1 5540.286317 1.073691e+06 None 0 MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... 0
15079 DE Augsburg DE033L1 12400 Airports 2020-09 15062-DE033L1 5540.286317 1.073691e+06 None 0 MULTIPOLYGON (((10.92006 48.41900, 10.92004 48... 0
... ... ... ... ... ... ... ... ... ... ... ... ... ...
22917 DE Augsburg DE033L1 50000 Water 2020-09 25255-DE033L1 1521.883149 1.113501e+04 None 0 MULTIPOLYGON (((10.87805 48.35996, 10.87788 48... 0
24755 DE Augsburg DE033L1 50000 Water 2020-09 25380-DE033L1 324.841752 2.014562e+03 None 0 MULTIPOLYGON (((10.89835 48.38594, 10.89837 48... 0
24697 DE Augsburg DE033L1 50000 Water 2020-09 25314-DE033L1 1389.142043 1.032410e+05 None 0 MULTIPOLYGON (((10.92348 48.44506, 10.92348 48... 0
5884 DE Augsburg DE033L1 50000 Water 2020-09 25293-DE033L1 406.403219 1.023780e+04 None 0 MULTIPOLYGON (((10.84367 48.45584, 10.84332 48... 0
22773 DE Augsburg DE033L1 50000 Water 2020-09 25236-DE033L1 497.513898 5.422245e+03 None 0 MULTIPOLYGON (((10.87693 48.37054, 10.87655 48... 0

210 rows × 13 columns

In [ ]:
within_shapefile.explore()
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
from shapely.geometry import box


# Load the shapefile
#shapefile = gpd.read_file(shapefile_path)
shapefile = within_shapefile

# Check if the CRS matches, if not, reproject
if shapefile.crs != raster_crs:
    shapefile = shapefile.to_crs(raster_crs)

# Check that geometries are valid
shapefile['valid'] = shapefile.is_valid
shapefile = shapefile[shapefile['valid']]


with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
    raster_bounds = src.bounds
    raster_bbox = box(*raster_bounds)  # Create a bounding box from the raster bounds

    # Only proceed if the raster and shapefile overlap
    if not shapefile.unary_union.intersects(raster_bbox):
        raise ValueError("Shapefile and raster do not overlap")
    else:
        print("The shapefile and raster overlap.")

    # Extract the raster values within the polygon
    for geom in shapefile.geometry:
        feature = [mapping(geom)]
        out_image, out_transform = mask(src, feature, crop=True)
        # process out_image
The shapefile and raster overlap.
In [ ]:
import numpy as np

# Initialize a list to hold the mean values for each band
band_means = []

# Extract the raster values within the polygon
for geom in shapefile.geometry:
    feature = [mapping(geom)]
    
    with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
        out_image, out_transform = mask(src, feature, crop=True)
        
        # Calculate mean of each band, excluding no-data values
        means = np.ma.array(out_image, mask=out_image == src.nodata).mean(axis=(1, 2))
        band_means.append(means.filled(src.nodata))
In [ ]:
with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
    raster_crs = src.crs
shapefile_crs = shapefile.crs

print("Raster CRS: ", raster_crs)
print("Shapefile CRS: ", shapefile_crs)
Raster CRS:  EPSG:4326
Shapefile CRS:  GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
In [ ]:
with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
    raster_bounds = src.bounds

# Use the total_bounds attribute of the shapefile to get its bounding coordinates
shapefile_bounds = shapefile.total_bounds

print("Raster bounds: ", raster_bounds)
print("Shapefile bounds: ", shapefile_bounds)
Raster bounds:  BoundingBox(left=10.7543531803083, bottom=48.25329694782757, right=10.97190533912122, top=48.46476914285253)
Shapefile bounds:  [10.75694347 48.25553441 10.96999023 48.4636161 ]
In [ ]:
import matplotlib.pyplot as plt
import rasterio
import rasterio.plot

# Open the raster file
with rasterio.open('../RandomForest/data_a/products/sentinel_bands_proj.tif') as src:
    # Plot the raster
    fig, ax = plt.subplots()
    rasterio.plot.show(src, ax=ax)
    # Overlay the shapefile
    shapefile.plot(ax=ax, color='red', alpha=0.5)
    plt.show()
No description has been provided for this image
In [ ]:
import numpy as np
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
import geopandas as gpd
from rasterio.features import geometry_mask

img_fp = '../RandomForest/data_a/products/sentinel_bands_proj.tif'

X = np.array([], dtype=np.float32).reshape(0, 8)  # Replace '8' with the actual number of bands if different
y = []  # Initialize y as an empty list

incomplete_features = []

with rasterio.open(img_fp) as src:
    raster_crs = src.crs
    nodata = src.nodatavals[0]  # Assuming all bands have the same nodata value
    band_count = src.count

    # Iterate over the geometries in the shapefile
    for index, row in shapefile.iterrows():
        geom = row['geometry']
        # Create a mask for the current geometry
        geom_mask = geometry_mask([geom], transform=src.transform, invert=True, out_shape=(src.height, src.width))

        # Check if the geometry intersects with the raster
        if not geom_mask.all():
            out_image, out_transform = mask(src, [geom], crop=True, nodata=nodata)
            out_image_masked = np.ma.masked_equal(out_image, nodata)
            valid_data = out_image_masked.compressed()

            # Ensure we have a complete set of pixels for all bands
            if valid_data.size % band_count == 0:
                out_image_reshaped = valid_data.reshape(-1, band_count)

                # Remove rows that contain nodata values
                valid_rows = np.all(out_image_reshaped != nodata, axis=1)
                out_image_reshaped = out_image_reshaped[valid_rows]

                # Only proceed if we have valid data left after removing nodata rows
                if out_image_reshaped.size > 0:
                    X = np.vstack((X, out_image_reshaped))
                    y.extend([row['class_2018']] * out_image_reshaped.shape[0])
            else:
                # Collect indices of incomplete features to handle later
                incomplete_features.append(index)
                print(f"Feature {index} does not have a complete set of pixels.")

# Handle incomplete features as needed
# For example, you might want to drop them from the shapefile
shapefile = shapefile.drop(incomplete_features)
Feature 5525 does not have a complete set of pixels.
Feature 8841 does not have a complete set of pixels.
Feature 2792 does not have a complete set of pixels.
Feature 23790 does not have a complete set of pixels.
Feature 24478 does not have a complete set of pixels.
Feature 24478 does not have a complete set of pixels.
Feature 5859 does not have a complete set of pixels.
Feature 7959 does not have a complete set of pixels.
Feature 12454 does not have a complete set of pixels.
Feature 11810 does not have a complete set of pixels.
Feature 10615 does not have a complete set of pixels.
Feature 12162 does not have a complete set of pixels.
Feature 13507 does not have a complete set of pixels.
Feature 15178 does not have a complete set of pixels.
Feature 23246 does not have a complete set of pixels.
Feature 16438 does not have a complete set of pixels.
In [ ]:
print(shapefile)
      country  fua_name fua_code code_2018 class_2018 prod_date  \
15079      DE  Augsburg  DE033L1     12400   Airports   2020-09   
15079      DE  Augsburg  DE033L1     12400   Airports   2020-09   
15079      DE  Augsburg  DE033L1     12400   Airports   2020-09   
15079      DE  Augsburg  DE033L1     12400   Airports   2020-09   
15079      DE  Augsburg  DE033L1     12400   Airports   2020-09   
...       ...       ...      ...       ...        ...       ...   
22917      DE  Augsburg  DE033L1     50000      Water   2020-09   
24755      DE  Augsburg  DE033L1     50000      Water   2020-09   
24697      DE  Augsburg  DE033L1     50000      Water   2020-09   
5884       DE  Augsburg  DE033L1     50000      Water   2020-09   
22773      DE  Augsburg  DE033L1     50000      Water   2020-09   

          identifier    perimeter          area comment  Pop2018  \
15079  15062-DE033L1  5540.286317  1.073691e+06    None        0   
15079  15062-DE033L1  5540.286317  1.073691e+06    None        0   
15079  15062-DE033L1  5540.286317  1.073691e+06    None        0   
15079  15062-DE033L1  5540.286317  1.073691e+06    None        0   
15079  15062-DE033L1  5540.286317  1.073691e+06    None        0   
...              ...          ...           ...     ...      ...   
22917  25255-DE033L1  1521.883149  1.113501e+04    None        0   
24755  25380-DE033L1   324.841752  2.014562e+03    None        0   
24697  25314-DE033L1  1389.142043  1.032410e+05    None        0   
5884   25293-DE033L1   406.403219  1.023780e+04    None        0   
22773  25236-DE033L1   497.513898  5.422245e+03    None        0   

                                                geometry  index_right  valid  
15079  MULTIPOLYGON (((10.92006 48.41900, 10.92004 48...            0   True  
15079  MULTIPOLYGON (((10.92006 48.41900, 10.92004 48...            0   True  
15079  MULTIPOLYGON (((10.92006 48.41900, 10.92004 48...            0   True  
15079  MULTIPOLYGON (((10.92006 48.41900, 10.92004 48...            0   True  
15079  MULTIPOLYGON (((10.92006 48.41900, 10.92004 48...            0   True  
...                                                  ...          ...    ...  
22917  MULTIPOLYGON (((10.87805 48.35996, 10.87788 48...            0   True  
24755  MULTIPOLYGON (((10.89835 48.38594, 10.89837 48...            0   True  
24697  MULTIPOLYGON (((10.92348 48.44506, 10.92348 48...            0   True  
5884   MULTIPOLYGON (((10.84367 48.45584, 10.84332 48...            0   True  
22773  MULTIPOLYGON (((10.87693 48.37054, 10.87655 48...            0   True  

[194 rows x 14 columns]
In [ ]:
# Initialize empty arrays for the pixel values and labels
X = np.array([], dtype=np.float32).reshape(0, 8)  # Replace '8' with the number of bands
y = []

with rasterio.open(img_fp) as src:
    # Ensure the shapefile is in the same CRS as the raster
    shapefile = shapefile.to_crs(src.crs)
    nodata = src.nodatavals[0]  # Assuming all bands have the same nodata value

    # Loop through each feature in the shapefile
    for index, row in shapefile.iterrows():
        geom = row.geometry
        # Mask the raster with the geometry
        out_image, out_transform = mask(src, [geom], crop=True, nodata=nodata, filled=False)

        # Check if there is any valid data
        if np.any(out_image.mask == False):
            # Reshape and append to X and y
            valid_pixels = out_image.data[~out_image.mask].reshape(-1, src.count)
            X = np.vstack((X, valid_pixels))
            y.extend([row['class_2018']] * valid_pixels.shape[0])
In [ ]:
out_image
Out[ ]:
masked_array(
  data=[[[--, --, --, 238.0],
         [--, --, --, 196.0],
         [--, --, 306.0, 175.0],
         [--, 241.0, 183.0, 183.0],
         [--, 202.0, 175.0, 175.0],
         [--, 194.0, 160.0, 160.0],
         [--, 215.0, 169.0, 169.0],
         [--, 210.0, 161.0, 161.0],
         [--, 192.0, 157.0, 157.0],
         [--, 187.0, 181.0, 181.0],
         [--, 200.0, 218.0, 218.0],
         [--, 178.0, 189.0, 314.0],
         [--, 228.0, 197.0, 217.0],
         [--, 266.0, 196.0, 195.0],
         [--, 319.0, 252.0, 256.0],
         [--, 302.0, 280.0, 295.0],
         [--, 313.0, 301.0, 280.0],
         [--, 709.0, 414.0, 227.0],
         [1372.0, 1374.0, 1366.0, 1244.0],
         [--, --, --, --]],

        [[--, --, --, 661.0],
         [--, --, --, 644.0],
         [--, --, 680.0, 709.0],
         [--, 688.0, 588.0, 588.0],
         [--, 857.0, 905.0, 905.0],
         [--, 686.0, 685.0, 685.0],
         [--, 874.0, 987.0, 987.0],
         [--, 942.0, 1188.0, 1188.0],
         [--, 1178.0, 1114.0, 1114.0],
         [--, 1216.0, 920.0, 920.0],
         [--, 1122.0, 720.0, 720.0],
         [--, 987.0, 741.0, 673.0],
         [--, 873.0, 744.0, 684.0],
         [--, 1118.0, 1152.0, 1172.0],
         [--, 1152.0, 1162.0, 1230.0],
         [--, 1232.0, 1194.0, 1196.0],
         [--, 1254.0, 1144.0, 1128.0],
         [--, 517.0, 934.0, 822.0],
         [385.0, 490.0, 1014.0, 936.0],
         [--, --, --, --]],

        [[--, --, --, 608.0],
         [--, --, --, 582.0],
         [--, --, 501.0, 525.0],
         [--, 451.0, 626.0, 626.0],
         [--, 861.0, 891.0, 891.0],
         [--, 514.0, 681.0, 681.0],
         [--, 705.0, 991.0, 991.0],
         [--, 959.0, 1242.0, 1242.0],
         [--, 1216.0, 1216.0, 1216.0],
         [--, 1178.0, 958.0, 958.0],
         [--, 1080.0, 628.0, 628.0],
         [--, 915.0, 572.0, 618.0],
         [--, 1008.0, 686.0, 615.0],
         [--, 1226.0, 1126.0, 1162.0],
         [--, 1274.0, 1282.0, 1280.0],
         [--, 1432.0, 1310.0, 1310.0],
         [--, 1322.0, 1208.0, 1146.0],
         [--, 420.0, 946.0, 606.0],
         [281.0, 344.0, 1108.0, 764.0],
         [--, --, --, --]],

        [[--, --, --, 1070.375],
         [--, --, --, 1070.5],
         [--, --, 1004.0, 1089.5],
         [--, 1074.125, 1123.375, 1123.375],
         [--, 1168.375, 1172.125, 1172.125],
         [--, 1203.3125, 1251.4375, 1251.4375],
         [--, 1215.0625, 1290.1875, 1290.1875],
         [--, 1246.6875, 1331.0625, 1331.0625],
         [--, 1271.8125, 1343.9375, 1343.9375],
         [--, 1290.4375, 1328.8125, 1328.8125],
         [--, 1321.875, 1316.625, 1316.625],
         [--, 1394.9375, 1301.8125, 1276.9375],
         [--, 1408.3125, 1299.9375, 1279.3125],
         [--, 1409.9375, 1333.3125, 1338.5625],
         [--, 1399.8125, 1401.9375, 1454.6875],
         [--, 1334.0, 1408.5, 1503.75],
         [--, 1212.5, 1353.0, 1485.75],
         [--, 924.3125, 1214.4375, 1395.75],
         [687.25, 879.0, 1199.0, 1376.0],
         [--, --, --, --]],

        [[--, --, --, 2334.125],
         [--, --, --, 2360.25],
         [--, --, 2501.75, 2443.25],
         [--, 2594.375, 2509.625, 2509.625],
         [--, 2666.625, 2559.375, 2559.375],
         [--, 2520.3125, 2444.9375, 2444.9375],
         [--, 2360.875, 2288.125, 2288.125],
         [--, 2163.625, 2067.375, 2067.375],
         [--, 2069.875, 2017.125, 2017.125],
         [--, 2079.625, 2137.375, 2137.375],
         [--, 2114.4375, 2230.3125, 2230.3125],
         [--, 2150.75, 2286.75, 2326.625],
         [--, 2043.75, 2202.75, 2281.875],
         [--, 1904.375, 2047.125, 2139.0],
         [--, 1732.625, 1819.875, 1898.0],
         [--, 1719.9375, 1798.3125, 1880.6875],
         [--, 1866.3125, 1982.4375, 2087.0625],
         [--, 2073.0, 2304.0, 2476.5625],
         [2171.0625, 2252.9375, 2436.8125, 2577.5625],
         [--, --, --, --]],

        [[--, --, --, 2750.125],
         [--, --, --, 2765.75],
         [--, --, 2926.75, 2828.75],
         [--, 3039.3125, 2901.4375, 2901.4375],
         [--, 3140.4375, 2983.8125, 2983.8125],
         [--, 2915.1875, 2788.5625, 2788.5625],
         [--, 2725.75, 2602.75, 2602.75],
         [--, 2530.75, 2388.75, 2388.75],
         [--, 2408.375, 2337.125, 2337.125],
         [--, 2358.625, 2447.875, 2447.875],
         [--, 2370.9375, 2542.3125, 2542.3125],
         [--, 2396.8125, 2594.9375, 2631.8125],
         [--, 2225.4375, 2465.8125, 2573.9375],
         [--, 2041.9375, 2239.3125, 2363.1875],
         [--, 1846.3125, 1915.4375, 1999.5625],
         [--, 1864.25, 1902.25, 1986.25],
         [--, 2095.75, 2199.75, 2323.25],
         [--, 2446.4375, 2594.3125, 2740.0],
         [2712.0625, 2706.125, 2755.875, 2829.625],
         [--, --, --, --]],

        [[--, --, --, 2423.0],
         [--, --, --, 2773.0],
         [--, --, 3067.0, 3142.0],
         [--, 3073.0, 2661.0, 2661.0],
         [--, 2783.0, 2470.0, 2470.0],
         [--, 2878.0, 2832.0, 2832.0],
         [--, 2522.0, 2702.0, 2702.0],
         [--, 2509.0, 2066.0, 2066.0],
         [--, 2367.0, 1951.0, 1951.0],
         [--, 2114.0, 2273.0, 2273.0],
         [--, 2232.0, 2852.0, 2852.0],
         [--, 2374.0, 2863.0, 2763.0],
         [--, 2417.0, 2911.0, 2896.0],
         [--, 1781.0, 2116.0, 2314.0],
         [--, 1510.0, 1470.0, 1608.0],
         [--, 1742.0, 1666.0, 1537.0],
         [--, 1693.0, 2174.0, 2359.0],
         [--, 2164.0, 2284.0, 3173.0],
         [2413.0, 2697.0, 2470.0, 2996.0],
         [--, --, --, --]],

        [[--, --, --, 2957.6875],
         [--, --, --, 2948.9375],
         [--, --, 3192.4375, 3034.8125],
         [--, 3257.875, 3066.125, 3066.125],
         [--, 3259.125, 3042.875, 3042.875],
         [--, 3059.3125, 2893.4375, 2893.4375],
         [--, 2869.3125, 2758.9375, 2758.9375],
         [--, 2622.9375, 2581.8125, 2581.8125],
         [--, 2501.875, 2565.125, 2565.125],
         [--, 2506.125, 2708.875, 2708.875],
         [--, 2545.875, 2840.625, 2840.625],
         [--, 2550.3125, 2882.9375, 2940.5625],
         [--, 2333.4375, 2608.3125, 2732.6875],
         [--, 2132.5, 2334.0, 2468.375],
         [--, 1947.5, 2060.0, 2147.625],
         [--, 1927.625, 2038.375, 2131.6875],
         [--, 2072.875, 2269.125, 2420.5625],
         [--, 2489.75, 2772.25, 2965.875],
         [2729.0, 2768.375, 2975.125, 3101.375],
         [--, --, --, --]]],
  mask=[[[ True,  True,  True, False],
         [ True,  True,  True, False],
         [ True,  True, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [False, False, False, False],
         [ True,  True,  True,  True]],

        [[ True,  True,  True, False],
         [ True,  True,  True, False],
         [ True,  True, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [False, False, False, False],
         [ True,  True,  True,  True]],

        [[ True,  True,  True, False],
         [ True,  True,  True, False],
         [ True,  True, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [False, False, False, False],
         [ True,  True,  True,  True]],

        [[ True,  True,  True, False],
         [ True,  True,  True, False],
         [ True,  True, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [False, False, False, False],
         [ True,  True,  True,  True]],

        [[ True,  True,  True, False],
         [ True,  True,  True, False],
         [ True,  True, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [False, False, False, False],
         [ True,  True,  True,  True]],

        [[ True,  True,  True, False],
         [ True,  True,  True, False],
         [ True,  True, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [False, False, False, False],
         [ True,  True,  True,  True]],

        [[ True,  True,  True, False],
         [ True,  True,  True, False],
         [ True,  True, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [False, False, False, False],
         [ True,  True,  True,  True]],

        [[ True,  True,  True, False],
         [ True,  True,  True, False],
         [ True,  True, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [ True, False, False, False],
         [False, False, False, False],
         [ True,  True,  True,  True]]],
  fill_value=-3.4e+38,
  dtype=float32)
In [ ]:
# Assuming 'shapefile' is a GeoDataFrame that has been properly read and is in the correct CRS
geoms = shapefile.geometry.values  # This will give us a numpy array of geometry objects

X = np.array([], dtype=np.float32).reshape(0, band_count)  # Adjust dtype and band_count as needed
y = []

with rasterio.open(img_fp) as src:
    for geom in geoms:  # Loop through each geometry in the array
        feature = [mapping(geom)]  # Convert to format expected by rasterio.mask.mask

        out_image, out_transform = mask(src, feature, crop=True)
        
        # Check for pixels that are not nodata (neither 0 nor 255 in all bands)
        if out_image.any():  # If there's any non-nodata pixel
            # Filter out the nodata pixels and reshape
            out_image_reshaped = out_image[:, (out_image[0] != nodata) & (out_image[0] != 0) & (out_image[0] != 255)].reshape(-1, band_count)
            
            # Now, we need to get the corresponding class for each geometry
            class_label = shapefile.loc[shapefile.geometry == geom, 'class_2018'].values[0]
            
            # Extend the X and y arrays
            X = np.vstack((X, out_image_reshaped))  # Stack the pixels
            y.extend([class_label] * out_image_reshaped.shape[0])  # Extend the labels
In [ ]:
# What are our classification labels?
labels = np.unique(shapefile["class_2018"])
print('The training data include {n} classes: {classes}\n'.format(n=labels.size, 
                                                                classes=labels))

# We will need a "X" matrix containing our features, and a "y" array containing our labels
print('Our X matrix is sized: {sz}'.format(sz=X.shape))
print('Our y array is sized: {sz}'.format(sz=np.array(y).shape))
The training data include 21 classes: ['Airports' 'Arable land (annual crops)' 'Construction sites'
 'Continuous urban fabric (S.L. : > 80%)'
 'Discontinuous dense urban fabric (S.L. : 50% -  80%)'
 'Discontinuous low density urban fabric (S.L. : 10% - 30%)'
 'Discontinuous medium density urban fabric (S.L. : 30% - 50%)'
 'Discontinuous very low density urban fabric (S.L. : < 10%)'
 'Fast transit roads and associated land' 'Forests' 'Green urban areas'
 'Herbaceous vegetation associations (natural grassland, moors...)'
 'Industrial, commercial, public, military and private units'
 'Isolated structures' 'Land without current use'
 'Mineral extraction and dump sites' 'Other roads and associated land'
 'Pastures' 'Railways and associated land' 'Sports and leisure facilities'
 'Water']

Our X matrix is sized: (188286, 8)
Our y array is sized: (188286,)
In [ ]:
fig, ax = plt.subplots(figsize=[13, 6])

# Numbers 1-8 for bands
band_count = np.arange(1, 9)

# Convert y to numpy array for indexing
y_array = np.array(y)

# Iterate over unique classes
classes = np.unique(y_array)
for class_type in classes:
    # Calculate mean reflectance value for each band for the current class
    band_intensity = np.mean(X[y_array == class_type, :], axis=0)

    # Plot the mean reflectance values on the single plot
    ax.plot(band_count, band_intensity, label=class_type)

# Add axis labels
ax.set_xlabel('Band #')
ax.set_ylabel('Reflectance Value')

# Add a title
ax.set_title('Band Intensities Full Overview')

# Add legend outside of the plot
ax.legend(loc='upper left', bbox_to_anchor=(1, 1))

# Adjust layout to accommodate the legend
plt.tight_layout()

# Display the plot
plt.show()
No description has been provided for this image

code_2018
11100 [Continuous urban fabric (S.L. : > 80%)]
11210 [Discontinuous dense urban fabric (S.L. : 50% ...
11220 [Discontinuous medium density urban fabric (S....
11230 [Discontinuous low density urban fabric (S.L. ...
11240 [Discontinuous very low density urban fabric (...
11300 [Isolated structures]
12100 [Industrial, commercial, public, military and ...
12210 [Fast transit roads and associated land]
12220 [Other roads and associated land]
12230 [Railways and associated land]
12400 [Airports]
13100 [Mineral extraction and dump sites]
13300 [Construction sites]
13400 [Land without current use]
14100 [Green urban areas]
14200 [Sports and leisure facilities]
21000 [Arable land (annual crops)]
23000 [Pastures]
31000 [Forests]
32000 [Herbaceous vegetation associations (natural g...
50000 [Water]
Name: class_2018, dtype: object

The training data include 21 classes: [
'Airports'
'Arable land (annual crops)'
'Construction sites'
'Continuous urban fabric (S.L. : > 80%)'
'Discontinuous dense urban fabric (S.L. : 50% - 80%)'
'Discontinuous low density urban fabric (S.L. : 10% - 30%)'
'Discontinuous medium density urban fabric (S.L. : 30% - 50%)'
'Discontinuous very low density urban fabric (S.L. : < 10%)'
'Fast transit roads and associated land'
'Forests'
'Green urban areas'
'Herbaceous vegetation associations (natural grassland, moors...)'
'Industrial, commercial, public, military and private units'
'Isolated structures'
'Land without current use'
'Mineral extraction and dump sites'
'Other roads and associated land'
'Pastures'
'Railways and associated land'
'Sports and leisure facilities'
'Water']

In [ ]:
def str_class_to_int(class_array):
    class_array[class_array == 'Airports'] = 0
    class_array[class_array == 'Arable land (annual crops)'] = 1
    class_array[class_array == 'Construction sites'] = 2
    class_array[class_array == 'Continuous urban fabric (S.L. : > 80%)'] = 3
    class_array[class_array == 'Discontinuous dense urban fabric (S.L. : 50% -  80%)'] = 4
    class_array[class_array == 'Discontinuous low density urban fabric (S.L. : 10% - 30%)'] = 5
    class_array[class_array == 'Discontinuous medium density urban fabric (S.L. : 30% - 50%)'] = 6
    class_array[class_array == 'Discontinuous very low density urban fabric (S.L. : < 10%)'] = 7
    class_array[class_array == 'Fast transit roads and associated land'] = 8
    class_array[class_array == 'Forests'] = 9
    class_array[class_array == 'Green urban areas'] = 10
    class_array[class_array == 'Herbaceous vegetation associations (natural grassland, moors...)'] = 11
    class_array[class_array == 'Industrial, commercial, public, military and private units'] = 12
    class_array[class_array == 'Isolated structures'] = 13
    class_array[class_array == 'Land without current use'] = 14
    class_array[class_array == 'Mineral extraction and dump sites'] = 15
    class_array[class_array == 'Other roads and associated land'] = 16
    class_array[class_array == 'Pastures'] = 17
    class_array[class_array == 'Railways and associated land'] = 18
    class_array[class_array == 'Sports and leisure facilities'] = 19
    class_array[class_array == 'Water'] = 20
    return(class_array.astype(int))

GaussianNB Naive Bayes¶

In [ ]:
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()
gnb.fit(X, y)
Out[ ]:
GaussianNB()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GaussianNB()
In [ ]:
from rasterio.plot import show
from rasterio.plot import show_hist
from rasterio.windows import Window
from rasterio.plot import reshape_as_raster, reshape_as_image

Number of Bands: 8
Height: 450 pixels
Width: 1150 pixels
(8, 450, 1150)
(450, 1150, 8)

[:, 150:600, 250:1400] slices the array:
The : means all bands are included.
150:600 slices the rows (height) of the image.
250:1400 slices the columns (width) of the image.

img_data = src.read()

# Select specific bands (7, 3, 2) and the desired row and column ranges
# Adjust indices by subtracting 1 from the band number
selected_bands_img = img_data[[6, 2, 1], 150:600, 250:1400]

# Now, selected_bands_img.shape will give you the dimensions (number_of_selected_bands, height, width)
num_selected_bands, height, width = selected_bands_img.shape
In [ ]:
with rasterio.open(img_fp) as src:
    # may need to reduce this image size if your kernel crashes, takes a lot of memory
    img = src.read()[:, 400:950, 450:1500]

# Take our full image and reshape into long 2d array (nrow * ncol, nband) for classification
print(img.shape)
reshaped_img = reshape_as_image(img)
print(reshaped_img.shape)
(8, 550, 1050)
(550, 1050, 8)
In [ ]:
class_prediction = gnb.predict(reshaped_img.reshape(-1, 8))

# Reshape our classification map back into a 2D matrix so we can visualize it
class_prediction = class_prediction.reshape(reshaped_img[:, :, 0].shape)
In [ ]:
class_prediction = str_class_to_int(class_prediction)
In [ ]:
def color_stretch(image, index):
    colors = image[:, :, index].astype(np.float64)
    for b in range(colors.shape[2]):
        colors[:, :, b] = rasterio.plot.adjust_band(colors[:, :, b])
    return colors
    
# find the highest pixel value in the prediction image
n = int(np.max(class_prediction))

# next setup a colormap for our map
colors = dict((
    (0, (255, 215, 0, 255)),    # Airports - Gold
    (1, (34, 139, 34, 255)),    # Arable Land - Forest Green
    (2, (255, 69, 0, 255)),     # Construction - Orange Red
    (3, (220, 20, 60, 255)),    # Continuous Urban Fabric - Crimson
    (4, (0, 191, 255, 255)),    # Discontinuous Dense Urban Fabric - Deep Sky Blue
    (5, (148, 0, 211, 255)),    # Discontinuous Low Density Urban Fabric - Dark Violet
    (6, (255, 140, 0, 255)),    # Discontinuous Medium Density Urban Fabric - Dark Orange
    (7, (32, 178, 170, 255)),   # Discontinuous Very Low Density Urban Fabric - Light Sea Green
    (8, (128, 0, 128, 255)),    # Fast Transit Roads and Associated Land - Purple
    (9, (34, 139, 34, 255)),    # Forests - Forest Green
    (10, (107, 142, 35, 255)),  # Green Urban Areas - Olive Drab
    (11, (154, 205, 50, 255)),  # Herbaceous Vegetation Associations - Yellow Green
    (12, (178, 34, 34, 255)),   # Industrial, Commercial, Public, Military and Private Units - Firebrick
    (13, (189, 183, 107, 255)), # Isolated Structures - Dark Khaki
    (14, (0, 128, 128, 255)),   # Land without Current Use - Teal
    (15, (165, 42, 42, 255)),   # Mineral Extraction and Dump Sites - Brown
    (16, (85, 107, 47, 255)),   # Other Roads and Associated Land - Dark Olive Green
    (17, (255, 99, 71, 255)),   # Pastures - Tomato
    (18, (0, 0, 128, 255)),     # Railways and Associated Land - Navy
    (19, (72, 61, 139, 255)),   # Sports and Leisure Facilities - Dark Slate Blue
    (20, (70, 130, 180, 255)),  # Water - Steel Blue
))

# Put 0 - 255 as float 0 - 1
for k in colors:
    v = colors[k]
    _v = [_v / 255.0 for _v in v]
    colors[k] = _v
    
index_colors = [colors[key] if key in colors else 
                (255, 255, 255, 0) for key in range(0, n+1)]

cmap = plt.matplotlib.colors.ListedColormap(index_colors, 'Classification', n+1)
In [ ]:
import matplotlib.patches as mpatches
from matplotlib.colors import ListedColormap

# Define the class labels and their corresponding colors
class_labels = {
    0: "Airports",
    1: "Arable Land",
    2: "Construction",
    3: "Continuous Urban Fabric",
    4: "Discontinuous Dense Urban Fabric",
    5: "Discontinuous Low Density Urban Fabric",
    6: "Discontinuous Medium Density Urban Fabric",
    7: "Discontinuous Very Low Density Urban Fabric",
    8: "Fast Transit Roads and Associated Land",
    9: "Forests",
    10: "Green Urban Areas",
    11: "Herbaceous Vegetation Associations",
    12: "Industrial, Commercial, Public, Military and Private Unit",
    13: "Isolated Structures",
    14: "Land without Current Use",
    15: "Mineral Extraction and Dump Sites",
    16: "Other Roads and Associated Land",
    17: "Pastures",
    18: "Railways and Associated Land",
    19: "Sports and Leisure Facilities",
    20: "Water",
}

patches = [mpatches.Patch(color=colors[key], label=class_labels[key]) for key in class_labels]

fig, axs = plt.subplots(2,1,figsize=(20,15))

img_stretched = color_stretch(reshaped_img, [7, 3, 2])
axs[0].imshow(img_stretched)

axs[1].imshow(class_prediction, cmap=cmap, interpolation='none')
axs[1].legend(handles=patches, loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0.)

fig.show()
C:\Users\leoni\AppData\Local\Temp\ipykernel_17152\2740226285.py:39: UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown
  fig.show()
No description has been provided for this image
In [ ]:
with rasterio.open(img_fp) as src:
    green_band = src.read(3)
    red_band = src.read(4)
    nir_band = src.read(8)
    
ndwi = (green_band.astype(float) - nir_band.astype(float)) / (green_band.astype(float) + nir_band.astype(float))
ndvi = (nir_band.astype(float) - red_band.astype(float)) / (red_band.astype(float) + nir_band.astype(float))
In [ ]:
ndwi = ndwi[150:600, 250:1400]
ndvi = ndvi[150:600, 250:1400]
In [ ]:
fig, axs = plt.subplots(2,2,figsize=(15,7))

img_stretched = color_stretch(reshaped_img, [3, 2, 1])
axs[0,0].imshow(img_stretched)

axs[0,1].imshow(class_prediction, cmap=cmap, interpolation='none')

nwdi_plot = axs[1,0].imshow(ndwi, cmap="RdYlGn")
axs[1,0].set_title("NDWI")
fig.colorbar(nwdi_plot, ax=axs[1,0])

ndvi_plot = axs[1,1].imshow(ndvi, cmap="RdYlGn")
axs[1,1].set_title("NDVI")
fig.colorbar(ndvi_plot, ax=axs[1,1])

plt.show()
No description has been provided for this image
In [ ]:
fig, axs = plt.subplots(1,2,figsize=(15,15))

img_stretched = color_stretch(reshaped_img, [3, 2, 1])
axs[0].imshow(img_stretched[0:180, 160:350])

axs[1].imshow(class_prediction[0:180, 160:350], cmap=cmap, interpolation='none')

fig.show()
C:\Users\leoni\AppData\Local\Temp\ipykernel_17152\1670099218.py:8: UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown
  fig.show()
No description has been provided for this image

Random Forest¶

In [ ]:
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import rasterio
import matplotlib.pyplot as plt

# Create the Random Forest model
rf = RandomForestClassifier()

# Train the model
rf.fit(X, y)
Out[ ]:
RandomForestClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier()
In [ ]:
with rasterio.open(img_fp) as src:
    img = src.read()[:, 400:950, 450:1500]
reshaped_img = reshape_as_image(img)

# Predict the classes using the Random Forest model
class_prediction = rf.predict(reshaped_img.reshape(-1, 8))

# Reshape the classification map for visualization
class_prediction = str_class_to_int(class_prediction) 
In [ ]:
def color_stretch(image, index):
    colors = image[:, :, index].astype(np.float64)
    for b in range(colors.shape[2]):
        colors[:, :, b] = rasterio.plot.adjust_band(colors[:, :, b])
    return colors
    
# find the highest pixel value in the prediction image
n = int(np.max(class_prediction))

# next setup a colormap for our map
colors = dict((
    (0, (255, 215, 0, 255)),    # Airports - Gold
    (1, (34, 139, 34, 255)),    # Arable Land - Forest Green
    (2, (255, 69, 0, 255)),     # Construction - Orange Red
    (3, (220, 20, 60, 255)),    # Continuous Urban Fabric - Crimson
    (4, (0, 191, 255, 255)),    # Discontinuous Dense Urban Fabric - Deep Sky Blue
    (5, (148, 0, 211, 255)),    # Discontinuous Low Density Urban Fabric - Dark Violet
    (6, (255, 140, 0, 255)),    # Discontinuous Medium Density Urban Fabric - Dark Orange
    (7, (32, 178, 170, 255)),   # Discontinuous Very Low Density Urban Fabric - Light Sea Green
    (8, (128, 0, 128, 255)),    # Fast Transit Roads and Associated Land - Purple
    (9, (34, 139, 34, 255)),    # Forests - Forest Green
    (10, (107, 142, 35, 255)),  # Green Urban Areas - Olive Drab
    (11, (154, 205, 50, 255)),  # Herbaceous Vegetation Associations - Yellow Green
    (12, (178, 34, 34, 255)),   # Industrial, Commercial, Public, Military and Private Units - Firebrick
    (13, (189, 183, 107, 255)), # Isolated Structures - Dark Khaki
    (14, (0, 128, 128, 255)),   # Land without Current Use - Teal
    (15, (165, 42, 42, 255)),   # Mineral Extraction and Dump Sites - Brown
    (16, (85, 107, 47, 255)),   # Other Roads and Associated Land - Dark Olive Green
    (17, (255, 99, 71, 255)),   # Pastures - Tomato
    (18, (0, 0, 128, 255)),     # Railways and Associated Land - Navy
    (19, (72, 61, 139, 255)),   # Sports and Leisure Facilities - Dark Slate Blue
    (20, (70, 130, 180, 255)),  # Water - Steel Blue
))

# Put 0 - 255 as float 0 - 1
for k in colors:
    v = colors[k]
    _v = [_v / 255.0 for _v in v]
    colors[k] = _v
    
index_colors = [colors[key] if key in colors else 
                (255, 255, 255, 0) for key in range(0, n+1)]

cmap = plt.matplotlib.colors.ListedColormap(index_colors, 'Classification', n+1)
In [ ]:
import matplotlib.patches as mpatches
from matplotlib.colors import ListedColormap

# Assuming reshaped_img is in the correct format
expected_shape = reshaped_img[:, :, 0].shape
class_prediction_2d = class_prediction.reshape(expected_shape)

fig, axs = plt.subplots(2, 1, figsize=(20, 15))

img_stretched = color_stretch(reshaped_img, [7, 3, 2])
axs[0].imshow(img_stretched)

# Use the reshaped 2D array for visualization
axs[1].imshow(class_prediction_2d, cmap=cmap, interpolation='none')
axs[1].legend(handles=patches, loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0.)

plt.show()
No description has been provided for this image

KMeans¶

In [ ]:
from sklearn.cluster import KMeans

bands, rows, cols = img.shape

k = 21 # num of clusters

kmeans_predictions = KMeans(n_clusters=k, random_state=0).fit(reshaped_img.reshape(-1, 8))

kmeans_predictions_2d = kmeans_predictions.labels_.reshape(rows, cols)

# Now show the classmap next to the image
fig, axs = plt.subplots(1,2,figsize=(15,8))

img_stretched = color_stretch(reshaped_img, [7, 3, 2]) #try different band combinations
axs[0].imshow(img_stretched)

axs[1].imshow(kmeans_predictions_2d)
c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
Out[ ]:
<matplotlib.image.AxesImage at 0x2935337f310>
No description has been provided for this image

KNeighbours¶

In [ ]:
from sklearn.neighbors import KNeighborsClassifier

# Create the KNN model
knn = KNeighborsClassifier()

# Train the model
knn.fit(X, y)
Out[ ]:
KNeighborsClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KNeighborsClassifier()
In [ ]:
with rasterio.open(img_fp) as src:
    img = src.read()[:, 400:950, 450:1500]
reshaped_img = reshape_as_image(img)

# Predict the classes using the KNN model
class_prediction = knn.predict(reshaped_img.reshape(-1, 8))

# Reshape the classification map for visualization
class_prediction = str_class_to_int(class_prediction)
In [ ]:
def color_stretch(image, index):
    colors = image[:, :, index].astype(np.float64)
    for b in range(colors.shape[2]):
        colors[:, :, b] = rasterio.plot.adjust_band(colors[:, :, b])
    return colors
    
# find the highest pixel value in the prediction image
n = int(np.max(class_prediction))

# next setup a colormap for our map
colors = dict((
    (0, (255, 215, 0, 255)),    # Airports - Gold
    (1, (34, 139, 34, 255)),    # Arable Land - Forest Green
    (2, (255, 69, 0, 255)),     # Construction - Orange Red
    (3, (220, 20, 60, 255)),    # Continuous Urban Fabric - Crimson
    (4, (0, 191, 255, 255)),    # Discontinuous Dense Urban Fabric - Deep Sky Blue
    (5, (148, 0, 211, 255)),    # Discontinuous Low Density Urban Fabric - Dark Violet
    (6, (255, 140, 0, 255)),    # Discontinuous Medium Density Urban Fabric - Dark Orange
    (7, (32, 178, 170, 255)),   # Discontinuous Very Low Density Urban Fabric - Light Sea Green
    (8, (128, 0, 128, 255)),    # Fast Transit Roads and Associated Land - Purple
    (9, (34, 139, 34, 255)),    # Forests - Forest Green
    (10, (107, 142, 35, 255)),  # Green Urban Areas - Olive Drab
    (11, (154, 205, 50, 255)),  # Herbaceous Vegetation Associations - Yellow Green
    (12, (178, 34, 34, 255)),   # Industrial, Commercial, Public, Military and Private Units - Firebrick
    (13, (189, 183, 107, 255)), # Isolated Structures - Dark Khaki
    (14, (0, 128, 128, 255)),   # Land without Current Use - Teal
    (15, (165, 42, 42, 255)),   # Mineral Extraction and Dump Sites - Brown
    (16, (85, 107, 47, 255)),   # Other Roads and Associated Land - Dark Olive Green
    (17, (255, 99, 71, 255)),   # Pastures - Tomato
    (18, (0, 0, 128, 255)),     # Railways and Associated Land - Navy
    (19, (72, 61, 139, 255)),   # Sports and Leisure Facilities - Dark Slate Blue
    (20, (70, 130, 180, 255)),  # Water - Steel Blue
))

# Put 0 - 255 as float 0 - 1
for k in colors:
    v = colors[k]
    _v = [_v / 255.0 for _v in v]
    colors[k] = _v
    
index_colors = [colors[key] if key in colors else 
                (255, 255, 255, 0) for key in range(0, n+1)]

cmap = plt.matplotlib.colors.ListedColormap(index_colors, 'Classification', n+1)
In [ ]:
fig, axs = plt.subplots(2, 1, figsize=(20, 15))

img_stretched = color_stretch(reshaped_img, [7, 3, 2])
axs[0].imshow(img_stretched)

class_prediction_2d = class_prediction.reshape(reshaped_img[:, :, 0].shape)
axs[1].imshow(class_prediction_2d, cmap=cmap, interpolation='none')
axs[1].legend(handles=patches, loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0.)

plt.show()
No description has been provided for this image

Decision Tree¶

In [ ]:
from sklearn.tree import DecisionTreeClassifier

# Create the Decision Tree model
dt = DecisionTreeClassifier()

# Train the model
dt.fit(X, y)
Out[ ]:
DecisionTreeClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier()
In [ ]:
# Read and reshape the image
with rasterio.open(img_fp) as src:
    img = src.read()[:, 400:950, 450:1500]
reshaped_img = reshape_as_image(img)

# Predict the classes using the Decision Tree model
class_prediction = dt.predict(reshaped_img.reshape(-1, 8))

# Reshape the classification map for visualization
class_prediction = str_class_to_int(class_prediction)
In [ ]:
fig, axs = plt.subplots(2, 1, figsize=(20, 15))

img_stretched = color_stretch(reshaped_img, [7, 3, 2])
axs[0].imshow(img_stretched)

class_prediction_2d = class_prediction.reshape(reshaped_img[:, :, 0].shape)
axs[1].imshow(class_prediction_2d, cmap=cmap, interpolation='none')
axs[1].legend(handles=patches, loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0.)

plt.show()
No description has been provided for this image
In [ ]:
from sklearn.gaussian_process import GaussianProcessClassifier

# Create the Gaussian Process model
gp = GaussianProcessClassifier()
In [ ]:
from sklearn.model_selection import train_test_split

# Suppose you want to use 20% of your data
X_subset, _, y_subset, _ = train_test_split(X, y, test_size=0.9, random_state=42)
In [ ]:
# Due to computational intensity, consider using a subset of data or reducing the resolution
# Train the model
gp.fit(X_subset, y_subset)
---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
Cell In[153], line 3
      1 # Due to computational intensity, consider using a subset of data or reducing the resolution
      2 # Train the model
----> 3 gp.fit(X_subset, y_subset)

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\base.py:1152, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
   1145     estimator._validate_params()
   1147 with config_context(
   1148     skip_parameter_validation=(
   1149         prefer_skip_nested_validation or global_skip_validation
   1150     )
   1151 ):
-> 1152     return fit_method(estimator, *args, **kwargs)

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\gaussian_process\_gpc.py:741, in GaussianProcessClassifier.fit(self, X, y)
    738     else:
    739         raise ValueError("Unknown multi-class mode %s" % self.multi_class)
--> 741 self.base_estimator_.fit(X, y)
    743 if self.n_classes_ > 2:
    744     self.log_marginal_likelihood_value_ = np.mean(
    745         [
    746             estimator.log_marginal_likelihood()
    747             for estimator in self.base_estimator_.estimators_
    748         ]
    749     )

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\base.py:1152, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
   1145     estimator._validate_params()
   1147 with config_context(
   1148     skip_parameter_validation=(
   1149         prefer_skip_nested_validation or global_skip_validation
   1150     )
   1151 ):
-> 1152     return fit_method(estimator, *args, **kwargs)

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\multiclass.py:339, in OneVsRestClassifier.fit(self, X, y)
    335 columns = (col.toarray().ravel() for col in Y.T)
    336 # In cases where individual estimators are very fast to train setting
    337 # n_jobs > 1 in can results in slower performance due to the overhead
    338 # of spawning threads.  See joblib issue #112.
--> 339 self.estimators_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
    340     delayed(_fit_binary)(
    341         self.estimator,
    342         X,
    343         column,
    344         classes=[
    345             "not %s" % self.label_binarizer_.classes_[i],
    346             self.label_binarizer_.classes_[i],
    347         ],
    348     )
    349     for i, column in enumerate(columns)
    350 )
    352 if hasattr(self.estimators_[0], "n_features_in_"):
    353     self.n_features_in_ = self.estimators_[0].n_features_in_

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\utils\parallel.py:65, in Parallel.__call__(self, iterable)
     60 config = get_config()
     61 iterable_with_config = (
     62     (_with_config(delayed_func, config), args, kwargs)
     63     for delayed_func, args, kwargs in iterable
     64 )
---> 65 return super().__call__(iterable_with_config)

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\joblib\parallel.py:1863, in Parallel.__call__(self, iterable)
   1861     output = self._get_sequential_output(iterable)
   1862     next(output)
-> 1863     return output if self.return_generator else list(output)
   1865 # Let's create an ID that uniquely identifies the current call. If the
   1866 # call is interrupted early and that the same instance is immediately
   1867 # re-used, this id will be used to prevent workers that were
   1868 # concurrently finalizing a task from the previous call to run the
   1869 # callback.
   1870 with self._lock:

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\joblib\parallel.py:1792, in Parallel._get_sequential_output(self, iterable)
   1790 self.n_dispatched_batches += 1
   1791 self.n_dispatched_tasks += 1
-> 1792 res = func(*args, **kwargs)
   1793 self.n_completed_tasks += 1
   1794 self.print_progress()

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\utils\parallel.py:127, in _FuncWrapper.__call__(self, *args, **kwargs)
    125     config = {}
    126 with config_context(**config):
--> 127     return self.function(*args, **kwargs)

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\multiclass.py:90, in _fit_binary(estimator, X, y, classes)
     88 else:
     89     estimator = clone(estimator)
---> 90     estimator.fit(X, y)
     91 return estimator

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\gaussian_process\_gpc.py:256, in _BinaryGaussianProcessClassifierLaplace.fit(self, X, y)
    254     self.log_marginal_likelihood_value_ = -np.min(lml_values)
    255 else:
--> 256     self.log_marginal_likelihood_value_ = self.log_marginal_likelihood(
    257         self.kernel_.theta
    258     )
    260 # Precompute quantities required for predictions which are independent
    261 # of actual query points
    262 K = self.kernel_(self.X_train_)

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\gaussian_process\_gpc.py:385, in _BinaryGaussianProcessClassifierLaplace.log_marginal_likelihood(self, theta, eval_gradient, clone_kernel)
    381     K = kernel(self.X_train_)
    383 # Compute log-marginal-likelihood Z and also store some temporaries
    384 # which can be reused for computing Z's gradient
--> 385 Z, (pi, W_sr, L, b, a) = self._posterior_mode(K, return_temporaries=True)
    387 if not eval_gradient:
    388     return Z

File c:\Users\leoni\Programme\Miniconda\envs\geo\Lib\site-packages\sklearn\gaussian_process\_gpc.py:443, in _BinaryGaussianProcessClassifierLaplace._posterior_mode(self, K, return_temporaries)
    441 W_sr = np.sqrt(W)
    442 W_sr_K = W_sr[:, np.newaxis] * K
--> 443 B = np.eye(W.shape[0]) + W_sr_K * W_sr
    444 L = cholesky(B, lower=True)
    445 # Line 6

MemoryError: Unable to allocate 2.64 GiB for an array with shape (18828, 18828) and data type float64
In [ ]:
from sklearn.decomposition import PCA

pca = PCA(n_components=0.95)  # Retain 95% of the variance
X_pca = pca.fit_transform(X)
X_subset, _, y_subset, _ = train_test_split(X_pca, y, test_size=0.7, random_state=42)
In [ ]:
from sklearn.svm import SVC
import numpy as np
import rasterio
import matplotlib.pyplot as plt

# Create the SVM model with RBF kernel
svm = SVC(kernel='rbf')

# Train the model
svm.fit(X, y)
In [ ]: